# libraries statments
import pandas as pd
import folium # https://github.com/python-visualization/folium/tree/master/examples
from folium.plugins import MarkerCluster #https://github.com/python-visualization/folium/tree/master/examples
import matplotlib.pyplot as plt
%matplotlib inline
path = '/home/lserra/Temp/'
filename = 'Mosquito.Borne.Disease.tar.gz'
disease = pd.read_csv(path + filename,compression='gzip',header=0)
disease.head()
disease.columns
disease.dtypes
new_disease = disease[['ID_AGRAVO', 'DT_NOTIFIC']].copy()
new_disease['DT_NOTIFIC'] = pd.to_datetime(new_disease['DT_NOTIFIC'])
# filtering the data by the column ID_AGRAVO
new_disease = new_disease[new_disease['ID_AGRAVO'] == 'A928']
# number of disease found and registered in the State of the Rio de Janeiro between Mar/2015 and Sep/2016
new_disease.set_index('DT_NOTIFIC')
new_disease_counts = new_disease.groupby('DT_NOTIFIC').count()
new_disease_counts.rename(columns={'ID_AGRAVO': 'DISEASES QTY'}, inplace=True)
new_disease_counts.head()
new_disease_counts.plot(figsize=(15,5))
plt.title(u'Zika Virus Diseases Qty by Month')
plt.ylabel(u'Diseases Qty')
plt.xlabel(u'Period')
plt.show()
As we can see the increase of the volume diseases found and resgistered happens during the period from Jan/2016 and May/2016. This period is summer in Brazil. This is the period with the most cases incidents.
disease_rj = disease.copy()
disease_rj = disease_rj[(disease_rj['ID_AGRAVO'] == 'A928')]
disease_rj = disease_rj[(disease_rj['DT_NOTIFIC'] >= '2016-03-01') & (disease_rj['DT_NOTIFIC'] <= '2016-03-31')]
disease_rj['DT_NOTIFIC'] = pd.to_datetime(disease_rj['DT_NOTIFIC'])
disease_rj = disease_rj.drop('Mosquito.Borne.Disease.csv', axis=1)
disease_rj.set_index('DT_NOTIFIC').head()
# total quantity of diseases in the State of Rio de Janeiro during the period from Mar/2016
disease_rj['ID_AGRAVO'].count()
# as we can see, this is the map distribution of the cases found and registered in the State of Rio de Janeiro,
# only in March/2016
disease_map = folium.Map(location=[-22.914921, -43.194043])
for d in disease_rj[['latitude','longitude']].values.tolist():
folium.RegularPolygonMarker(location=d, popup='', fill_color='#769d96',
number_of_sides=4, radius=5).add_to(disease_map)
disease_map